Spaces:

adelevett
/

Flashcard2Audio

Sleeping

App Files Files Community

adelevett commited on Feb 6

Commit

0eb1322

verified ·

1 Parent(s): 6f37f73

Upload 4 files

Browse files

Files changed (1) hide show

app.py +140 -23

app.py CHANGED Viewed

@@ -118,6 +118,33 @@ def generate_audio_for_row(q_text, a_text, idx, tmpdir, mode):
     return q_out, a_out
 def parse_file(file_obj):
     if file_obj is None:
         return None, None, None, "No file uploaded", "", None
@@ -137,6 +164,7 @@ def parse_file(file_obj):
             df = df.iloc[:, :2]
             df.columns = ["Question", "Answer"]
         elif ext == ".apkg" or ext == ".zip":
             # Extract to a PERSISTENT temp dir (passed to state)
@@ -155,17 +183,18 @@ def parse_file(file_obj):
             conn = sqlite3.connect(col_path)
             cur = conn.cursor()
-            cur.execute("SELECT flds FROM notes")
             rows = cur.fetchall()
             data = []
             for r in rows:
                 flds = r[0].split('\x1f')
                 q = flds[0] if len(flds) > 0 else ""
                 a = flds[1] if len(flds) > 1 else ""
-                data.append([q, a])
-            df = pd.DataFrame(data, columns=["Question", "Answer"])
             conn.close()
         else:
@@ -177,18 +206,38 @@ def parse_file(file_obj):
         if has_media:
             msg += " 🎵 Existing media detected."
-        return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df)), extract_root
     except Exception as e:
         if extract_root and os.path.exists(extract_root):
             shutil.rmtree(extract_root)
         return None, None, None, f"Error: {str(e)}", "", None
-def estimate_time(num_cards):
-    """Rough estimate: 2s per card"""
-    seconds = num_cards * 2.0
-    if seconds < 60: return f"~{int(seconds)}s"
-    return f"~{int(seconds/60)} min"
 def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
     if df_full is None or len(df_full) == 0:
@@ -240,8 +289,8 @@ def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Prog
             fields=[{'name': 'Question'}, {'name': 'Answer'}],
             templates=[{
                 'name': 'Card 1',
-                'qfmt': '{{Question}}<br>{{AudioQ}}',
-                'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}<br>{{AudioA}}',
             }])
         my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
@@ -347,8 +396,20 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
         eta_box = gr.Textbox(label="Est. Time", interactive=False)
     with gr.Row():
-        search_box = gr.Textbox(label="Filter (Optional)", placeholder="Process subset...")
         # New 3-Way Toggle
         mode_radio = gr.Radio(
             choices=[
@@ -360,7 +421,11 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
             label="Generation Mode"
         )
-    preview_table = gr.Dataframe(label="Preview (First 100)", interactive=False)
     with gr.Row():
         btn = gr.Button("🚀 Generate Deck", variant="primary")
@@ -368,21 +433,73 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
     result_lbl = gr.Textbox(label="Result", interactive=False)
     def on_upload(file):
         # Returns: df, has_media, preview, msg, eta, extract_path
-        df, _, preview, msg, eta, ext_path = parse_file(file)
-        return df, preview, msg, eta, ext_path
     file_input.upload(on_upload, inputs=file_input,
-                      outputs=[full_df_state, preview_table, status, eta_box, extract_root_state])
-    def on_search(term, df):
-        if df is None: return None
-        if not term: return df.head(PREVIEW_LIMIT)
-        mask = df.astype(str).apply(lambda x: x.str.contains(term, case=False)).any(axis=1)
-        return df[mask].head(PREVIEW_LIMIT)
-    search_box.change(on_search, inputs=[search_box, full_df_state], outputs=preview_table)
     btn.click(process_dataframe,
               inputs=[full_df_state, search_box, extract_root_state, mode_radio],

     return q_out, a_out
+def strip_html_for_display(text):
+    """Remove HTML tags for preview readability."""
+    if pd.isna(text) or text == "": return ""
+    text = str(text)
+    # Remove HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # Decode HTML entities
+    text = text.replace('&nbsp;', ' ').replace('&gt;', '>').replace('&lt;', '<').replace('&amp;', '&')
+    # Limit length for display
+    if len(text) > 200:
+        text = text[:200] + '...'
+    return text.strip()
+def extract_unique_tags(df):
+    """Extract all unique tags from the Tags column."""
+    if df is None or 'Tags' not in df.columns:
+        return ["All"]
+    all_tags = set()
+    for tag_str in df['Tags']:
+        if tag_str:
+            # Tags are space-separated, e.g., " MK_MathematicsKnowledge "
+            tags = [t.strip() for t in tag_str.split() if t.strip()]
+            all_tags.update(tags)
+    return ["All"] + sorted(list(all_tags))
 def parse_file(file_obj):
     if file_obj is None:
         return None, None, None, "No file uploaded", "", None
             df = df.iloc[:, :2]
             df.columns = ["Question", "Answer"]
+            df['Tags'] = ""  # CSV files don't have tags
         elif ext == ".apkg" or ext == ".zip":
             # Extract to a PERSISTENT temp dir (passed to state)
             conn = sqlite3.connect(col_path)
             cur = conn.cursor()
+            cur.execute("SELECT flds, tags FROM notes")
             rows = cur.fetchall()
             data = []
             for r in rows:
                 flds = r[0].split('\x1f')
+                tags = r[1].strip() if len(r) > 1 else ""
                 q = flds[0] if len(flds) > 0 else ""
                 a = flds[1] if len(flds) > 1 else ""
+                data.append([q, a, tags])
+            df = pd.DataFrame(data, columns=["Question", "Answer", "Tags"])
             conn.close()
         else:
         if has_media:
             msg += " 🎵 Existing media detected."
+        return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df), has_media), extract_root
     except Exception as e:
         if extract_root and os.path.exists(extract_root):
             shutil.rmtree(extract_root)
         return None, None, None, f"Error: {str(e)}", "", None
+def estimate_time(num_cards, has_existing_media=False, mode="Smart Fill (Preserve Existing)"):
+    """
+    Estimate based on benchmark: ~4.7s per card for full generation.
+    Adjusts for Smart Fill mode when existing media is present.
+    """
+    if num_cards == 0:
+        return "0s"
+    # Base benchmark: 4.7s per card for full audio generation
+    seconds_per_card = 4.7
+    # If using Smart Fill with existing media, assume ~50% speedup (many cards already have audio)
+    if has_existing_media and "Smart Fill" in mode:
+        seconds_per_card *= 0.5
+    seconds = num_cards * seconds_per_card
+    if seconds < 60:
+        return f"~{int(seconds)}s"
+    elif seconds < 3600:
+        return f"~{int(seconds/60)} min"
+    else:
+        hours = int(seconds / 3600)
+        mins = int((seconds % 3600) / 60)
+        return f"~{hours}h {mins}m" if mins > 0 else f"~{hours}h"
 def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
     if df_full is None or len(df_full) == 0:
             fields=[{'name': 'Question'}, {'name': 'Answer'}],
             templates=[{
                 'name': 'Card 1',
+                'qfmt': '{{Question}}',
+                'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
             }])
         my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
         eta_box = gr.Textbox(label="Est. Time", interactive=False)
     with gr.Row():
+        search_box = gr.Textbox(label="Search Text", placeholder="Enter text to search...")
+        search_field = gr.Radio(
+            choices=["Both", "Question Only", "Answer Only"],
+            value="Both",
+            label="Search In"
+        )
+        tag_dropdown = gr.Dropdown(
+            label="Filter by Tag",
+            choices=["All"],
+            value="All",
+            interactive=True
+        )
+    with gr.Row():
         # New 3-Way Toggle
         mode_radio = gr.Radio(
             choices=[
             label="Generation Mode"
         )
+    preview_table = gr.Dataframe(
+        label="Preview (First 100)",
+        interactive=False,
+        column_widths=["30%", "45%", "25%"]
+    )
     with gr.Row():
         btn = gr.Button("🚀 Generate Deck", variant="primary")
     result_lbl = gr.Textbox(label="Result", interactive=False)
+    has_media_state = gr.State(False)
     def on_upload(file):
         # Returns: df, has_media, preview, msg, eta, extract_path
+        df, has_media, preview, msg, eta, ext_path = parse_file(file)
+        # Extract tags and create cleaned preview
+        tag_choices = extract_unique_tags(df)
+        if df is not None:
+            display_df = df.copy()
+            display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
+            display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
+            clean_preview = display_df.head(PREVIEW_LIMIT)
+        else:
+            clean_preview = preview
+        return (
+            df,                                                      # full_df_state
+            has_media,                                               # has_media_state
+            clean_preview,                                           # preview_table
+            msg,                                                     # status
+            eta,                                                     # eta_box
+            ext_path,                                                # extract_root_state
+            gr.Dropdown(choices=tag_choices, value="All")           # tag_dropdown
+        )
     file_input.upload(on_upload, inputs=file_input,
+                      outputs=[full_df_state, has_media_state, preview_table, status, eta_box, extract_root_state, tag_dropdown])
+    def on_search(term, df, has_media, mode, search_in, selected_tag):
+        if df is None: return None, "No data"
+        filtered_df = df.copy()
+        # Apply tag filter first
+        if selected_tag and selected_tag != "All":
+            filtered_df = filtered_df[filtered_df['Tags'].str.contains(selected_tag, na=False, case=False)]
+        # Apply text search
+        if term:
+            if search_in == "Question Only":
+                mask = filtered_df['Question'].str.contains(term, case=False, na=False)
+            elif search_in == "Answer Only":
+                mask = filtered_df['Answer'].str.contains(term, case=False, na=False)
+            else:  # Both
+                mask = filtered_df.astype(str).apply(lambda x: x.str.contains(term, case=False, na=False)).any(axis=1)
+            filtered_df = filtered_df[mask]
+        # Create cleaned display version
+        display_df = filtered_df.copy()
+        display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
+        display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
+        return display_df.head(PREVIEW_LIMIT), estimate_time(len(filtered_df), has_media, mode)
+    search_box.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
+                     outputs=[preview_table, eta_box])
+    search_field.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
+                     outputs=[preview_table, eta_box])
+    tag_dropdown.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
+                     outputs=[preview_table, eta_box])
+    mode_radio.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
+                     outputs=[preview_table, eta_box])
     btn.click(process_dataframe,
               inputs=[full_df_state, search_box, extract_root_state, mode_radio],