Spaces:

adelevett
/

Flashcard2Audio

Sleeping

App Files Files Community

adelevett commited on Feb 5

Commit

bab49e8

verified ·

1 Parent(s): 4fc3ae4

Upload 4 files

Browse files

Files changed (4) hide show

README.md +9 -6
app.py +376 -0
packages.txt +2 -0
requirements.txt +12 -0

README.md CHANGED Viewed

@@ -1,14 +1,17 @@
 ---
 title: Flashcard2Audio
-emoji: 👀
-colorFrom: green
 colorTo: purple
 sdk: gradio
-sdk_version: 6.5.1
 app_file: app.py
 pinned: false
-license: mit
-short_description: Add audio files to existing Anki decks
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Flashcard2Audio
+emoji: 🎴
+colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 4.0.0
 app_file: app.py
 pinned: false
+python_version: "3.10"
 ---
+# Flashcard2Audio
+Offline Neural TTS Audio Generator for Anki Flashcards
+Supports CSV and APKG input with smart media preservation.

app.py ADDED Viewed

	@@ -0,0 +1,376 @@

+import gradio as gr
+import pandas as pd
+import genanki
+import pocket_tts
+import tempfile
+import os
+import shutil
+import random
+import zipfile
+import sqlite3
+import re
+import time
+import json
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pydub import AudioSegment
+# --- Configuration ---
+MAX_WORKERS = 2        # Keep low for HF Spaces (CPU/RAM constraint)
+PREVIEW_LIMIT = 100    # UI safety cap
+PROGRESS_THROTTLE = 1.0 # Seconds between UI updates
+# --- Helpers ---
+def clean_text_for_tts(text):
+    """Deep cleaning for TTS input only."""
+    if pd.isna(text): return ""
+    text = str(text)
+    # Remove HTML tags
+    text = re.sub(re.compile('<.*?>'), '', text)
+    # Remove Anki sound tags
+    text = re.sub(r'\[sound:.*?\]', '', text)
+    # Remove mustache templates
+    text = re.sub(r'\{\{.*?\}\}', '', text)
+    return text.strip()
+def has_existing_audio(text):
+    """Check if text already contains an Anki sound tag."""
+    if pd.isna(text): return False
+    return bool(re.search(r'\[sound:.*?\]', str(text)))
+print("Loading TTS Model...")
+try:
+    TTS_MODEL = pocket_tts.load_model()
+    print("Model Loaded Successfully.")
+except Exception as e:
+    print(f"CRITICAL ERROR loading model: {e}")
+    TTS_MODEL = None
+def wav_to_mp3(src_wav, dst_mp3):
+    AudioSegment.from_wav(src_wav).export(dst_mp3, format="mp3", bitrate="64k")
+def generate_audio_for_row(q_text, a_text, idx, tmpdir, mode):
+    """
+    Generates audio. Returns (path_q, path_a).
+    Returns 'SKIP' if audio exists and we are preserving it.
+    """
+    q_out, a_out = None, None
+    # Logic for handling modes
+    # Mode 0: Smart Fill (Preserve Existing)
+    # Mode 1: Overwrite All
+    overwrite = (mode == "Generate all new audio (Overwrite)")
+    # --- Question Processing ---
+    if not overwrite and has_existing_audio(q_text):
+        q_out = "SKIP"
+    else:
+        q_wav = os.path.join(tmpdir, f"q_{idx}.wav")
+        try:
+            clean = clean_text_for_tts(q_text)
+            if clean and TTS_MODEL:
+                pocket_tts.generate_to_file(TTS_MODEL, clean, q_wav)
+                q_out = q_wav
+            else:
+                AudioSegment.silent(duration=500).export(q_wav, format="wav")
+                q_out = q_wav
+        except Exception as e:
+            print(f"TTS Error Q row {idx}: {e}")
+            # Fallback to silence to keep deck integrity
+            AudioSegment.silent(duration=500).export(q_wav, format="wav")
+            q_out = q_wav
+    # --- Answer Processing ---
+    if not overwrite and has_existing_audio(a_text):
+        a_out = "SKIP"
+    else:
+        a_wav = os.path.join(tmpdir, f"a_{idx}.wav")
+        try:
+            clean = clean_text_for_tts(a_text)
+            if clean and TTS_MODEL:
+                pocket_tts.generate_to_file(TTS_MODEL, clean, a_wav)
+                a_out = a_wav
+            else:
+                AudioSegment.silent(duration=500).export(a_wav, format="wav")
+                a_out = a_wav
+        except Exception as e:
+            print(f"TTS Error A row {idx}: {e}")
+            AudioSegment.silent(duration=500).export(a_wav, format="wav")
+            a_out = a_wav
+    return q_out, a_out
+def parse_file(file_obj):
+    if file_obj is None:
+        return None, None, None, "No file uploaded", "", None
+    ext = Path(file_obj.name).suffix.lower()
+    df = pd.DataFrame()
+    extract_root = None # Directory where we keep original media
+    has_media = False
+    try:
+        if ext == ".csv":
+            df = pd.read_csv(file_obj.name)
+            if len(df.columns) < 2:
+                df = pd.read_csv(file_obj.name, header=None)
+            if len(df.columns) < 2:
+                return None, None, None, "CSV error: Need 2 columns", "", None
+            df = df.iloc[:, :2]
+            df.columns = ["Question", "Answer"]
+        elif ext == ".apkg" or ext == ".zip":
+            # Extract to a PERSISTENT temp dir (passed to state)
+            extract_root = tempfile.mkdtemp()
+            with zipfile.ZipFile(file_obj.name, 'r') as z:
+                z.extractall(extract_root)
+            # Check for existing media (rough check)
+            media_dir = os.path.join(extract_root, "media")
+            has_media = os.path.exists(media_dir) or any(f.isdigit() for f in os.listdir(extract_root))
+            col_path = os.path.join(extract_root, "collection.anki2")
+            if not os.path.exists(col_path):
+                shutil.rmtree(extract_root)
+                return None, None, None, "Invalid APKG: No collection.anki2", "", None
+            conn = sqlite3.connect(col_path)
+            cur = conn.cursor()
+            cur.execute("SELECT flds FROM notes")
+            rows = cur.fetchall()
+            data = []
+            for r in rows:
+                flds = r[0].split('\x1f')
+                q = flds[0] if len(flds) > 0 else ""
+                a = flds[1] if len(flds) > 1 else ""
+                data.append([q, a])
+            df = pd.DataFrame(data, columns=["Question", "Answer"])
+            conn.close()
+        else:
+            return None, None, None, "Unsupported file type", "", None
+        df = df.fillna("")
+        msg = f"✅ Loaded {len(df)} cards."
+        if has_media:
+            msg += " 🎵 Existing media detected."
+        return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df)), extract_root
+    except Exception as e:
+        if extract_root and os.path.exists(extract_root):
+            shutil.rmtree(extract_root)
+        return None, None, None, f"Error: {str(e)}", "", None
+def estimate_time(num_cards):
+    """Rough estimate: 2s per card"""
+    seconds = num_cards * 2.0
+    if seconds < 60: return f"~{int(seconds)}s"
+    return f"~{int(seconds/60)} min"
+def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
+    if df_full is None or len(df_full) == 0:
+        return None, "No data"
+    # Filter logic
+    if search_term:
+        mask = df_full.astype(str).apply(lambda x: x.str.contains(search_term, case=False)).any(axis=1)
+        df = df_full[mask]
+    else:
+        df = df_full
+    if len(df) == 0:
+        return None, "No matching cards"
+    # Setup
+    work_dir = tempfile.mkdtemp()
+    media_files = []
+    try:
+        # --- Media Preservation Logic ---
+        if extract_root:
+            media_map_path = os.path.join(extract_root, "media")
+            if os.path.exists(media_map_path) and os.path.getsize(media_map_path) > 0:
+                try:
+                    with open(media_map_path, 'r') as f:
+                        # Fix: Handle potentially malformed JSON gracefully
+                        content = f.read().strip()
+                        if content:
+                            media_map = json.loads(content) # {"0": "my_audio.mp3", ...}
+                            # Rename files in extract_root back to original names
+                            for k, v in media_map.items():
+                                src = os.path.join(extract_root, k)
+                                dst = os.path.join(extract_root, v)
+                                if os.path.exists(src):
+                                    # Rename enables genanki to find them by name
+                                    os.rename(src, dst)
+                                    media_files.append(dst)
+                        else:
+                            print("Warning: Media map file is empty.")
+                except Exception as e:
+                    print(f"Warning: Could not restore existing media: {e}")
+        # --- Genanki Setup ---
+        model_id = random.randrange(1 << 30, 1 << 31)
+        my_model = genanki.Model(
+            model_id, 'PocketTTS Model',
+            fields=[{'name': 'Question'}, {'name': 'Answer'}],
+            templates=[{
+                'name': 'Card 1',
+                'qfmt': '{{Question}}<br>{{AudioQ}}',
+                'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}<br>{{AudioA}}',
+            }])
+        my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
+        # --- Execution ---
+        total = len(df)
+        completed = 0
+        last_update_time = 0
+        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as exe:
+            futures = {}
+            for idx, row in df.iterrows():
+                f = exe.submit(generate_audio_for_row, str(row['Question']), str(row['Answer']), idx, work_dir, mode)
+                futures[f] = idx
+            for future in as_completed(futures):
+                idx = futures[future]
+                try:
+                    q_res, a_res = future.result()
+                    # --- Field Construction (Corrected) ---
+                    q_original = str(df.iloc[idx]['Question'])
+                    q_field = q_original
+                    # Update Question
+                    if q_res and q_res != "SKIP":
+                        q_mp3 = str(Path(q_res).with_suffix('.mp3'))
+                        wav_to_mp3(q_res, q_mp3)
+                        os.remove(q_res) # clean wav
+                        media_files.append(q_mp3)
+                        # Remove OLD sound tags first to avoid duplicates
+                        q_field = re.sub(r'\[sound:.*?\]', '', q_field)
+                        q_field = q_field.strip() + f"<br>[sound:{os.path.basename(q_mp3)}]"
+                    # Update Answer
+                    a_original = str(df.iloc[idx]['Answer'])
+                    a_field = a_original
+                    if a_res and a_res != "SKIP":
+                        a_mp3 = str(Path(a_res).with_suffix('.mp3'))
+                        wav_to_mp3(a_res, a_mp3)
+                        os.remove(a_res) # clean wav
+                        media_files.append(a_mp3)
+                        # Remove OLD sound tags first
+                        a_field = re.sub(r'\[sound:.*?\]', '', a_field)
+                        a_field = a_field.strip() + f"<br>[sound:{os.path.basename(a_mp3)}]"
+                    # Add Note
+                    note = genanki.Note(
+                        model=my_model,
+                        fields=[q_field, a_field]
+                    )
+                    my_deck.add_note(note)
+                except Exception as e:
+                    print(f"Row {idx} failed: {e}")
+                # --- Throttled Progress ---
+                completed += 1
+                current_time = time.time()
+                if completed == total or (current_time - last_update_time) > PROGRESS_THROTTLE:
+                    progress(completed / total, desc=f"Processed {completed}/{total}")
+                    last_update_time = current_time
+        # --- Package ---
+        package = genanki.Package(my_deck)
+        # Deduplicate media files list
+        package.media_files = list(set(media_files))
+        raw_out = os.path.join(work_dir, "output.apkg")
+        package.write_to_file(raw_out)
+        final_out = os.path.join(tempfile.gettempdir(), f"pocket_deck_{random.randint(1000,9999)}.apkg")
+        shutil.copy(raw_out, final_out)
+        return final_out, f"✅ Done! Packaged {len(package.media_files)} audio files."
+    except Exception as e:
+        return None, f"Critical Error: {str(e)}"
+    finally:
+        # --- Guaranteed Cleanup ---
+        if os.path.exists(work_dir):
+            shutil.rmtree(work_dir)
+        # Also clean up the input extraction root if it exists
+        if extract_root and os.path.exists(extract_root):
+            shutil.rmtree(extract_root)
+# --- UI ---
+with gr.Blocks(title="Pocket TTS Anki") as app:
+    gr.Markdown("## 🎴 Pocket TTS Anki Generator")
+    gr.Markdown("Offline Neural Audio. Supports CSV and APKG (smart media preservation).")
+    # State variables
+    full_df_state = gr.State()
+    extract_root_state = gr.State() # Holds path to unzipped APKG
+    with gr.Row():
+        file_input = gr.File(label="Upload (CSV/APKG)", file_types=[".csv", ".apkg", ".zip"])
+        status = gr.Textbox(label="Status", interactive=False)
+        eta_box = gr.Textbox(label="Est. Time", interactive=False)
+    with gr.Row():
+        search_box = gr.Textbox(label="Filter (Optional)", placeholder="Process subset...")
+        # New 3-Way Toggle
+        mode_radio = gr.Radio(
+            choices=[
+                "Smart Fill (Preserve Existing)",
+                "Generate all new audio (Overwrite)",
+                "Only generate missing (Same as Smart Fill)"
+            ],
+            value="Smart Fill (Preserve Existing)",
+            label="Generation Mode"
+        )
+    preview_table = gr.Dataframe(label="Preview (First 100)", interactive=False, height=300)
+    with gr.Row():
+        btn = gr.Button("🚀 Generate Deck", variant="primary")
+        dl = gr.File(label="Download")
+    result_lbl = gr.Textbox(label="Result", interactive=False)
+    def on_upload(file):
+        # Returns: df, has_media, preview, msg, eta, extract_path
+        df, _, preview, msg, eta, ext_path = parse_file(file)
+        return df, preview, msg, eta, ext_path
+    file_input.upload(on_upload, inputs=file_input,
+                      outputs=[full_df_state, preview_table, status, eta_box, extract_root_state])
+    def on_search(term, df):
+        if df is None: return None
+        if not term: return df.head(PREVIEW_LIMIT)
+        mask = df.astype(str).apply(lambda x: x.str.contains(term, case=False)).any(axis=1)
+        return df[mask].head(PREVIEW_LIMIT)
+    search_box.change(on_search, inputs=[search_box, full_df_state], outputs=preview_table)
+    btn.click(process_dataframe,
+              inputs=[full_df_state, search_box, extract_root_state, mode_radio],
+              outputs=[dl, result_lbl])
+if __name__ == "__main__":
+    app.queue(max_size=2).launch(server_name="0.0.0.0", server_port=7860)

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ffmpeg
2	+ libsndfile1

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+# PyTorch installation with platform-specific versions:
+--extra-index-url https://download.pytorch.org/whl/cpu
+# Linux (HF Spaces) - use CPU builds from extra index
+torch>=2.5.0
+gradio>=4.0.0
+pandas
+genanki
+pydub
+# Pocket TTS is not on PyPI - must install from GitHub
+git+https://github.com/kyutai-labs/pocket-tts.git