adelevett commited on
Commit
0eb1322
·
verified ·
1 Parent(s): 6f37f73

Upload 4 files

Browse files
Files changed (1) hide show
  1. app.py +140 -23
app.py CHANGED
@@ -118,6 +118,33 @@ def generate_audio_for_row(q_text, a_text, idx, tmpdir, mode):
118
 
119
  return q_out, a_out
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  def parse_file(file_obj):
122
  if file_obj is None:
123
  return None, None, None, "No file uploaded", "", None
@@ -137,6 +164,7 @@ def parse_file(file_obj):
137
 
138
  df = df.iloc[:, :2]
139
  df.columns = ["Question", "Answer"]
 
140
 
141
  elif ext == ".apkg" or ext == ".zip":
142
  # Extract to a PERSISTENT temp dir (passed to state)
@@ -155,17 +183,18 @@ def parse_file(file_obj):
155
 
156
  conn = sqlite3.connect(col_path)
157
  cur = conn.cursor()
158
- cur.execute("SELECT flds FROM notes")
159
  rows = cur.fetchall()
160
 
161
  data = []
162
  for r in rows:
163
  flds = r[0].split('\x1f')
 
164
  q = flds[0] if len(flds) > 0 else ""
165
  a = flds[1] if len(flds) > 1 else ""
166
- data.append([q, a])
167
 
168
- df = pd.DataFrame(data, columns=["Question", "Answer"])
169
  conn.close()
170
 
171
  else:
@@ -177,18 +206,38 @@ def parse_file(file_obj):
177
  if has_media:
178
  msg += " 🎵 Existing media detected."
179
 
180
- return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df)), extract_root
181
 
182
  except Exception as e:
183
  if extract_root and os.path.exists(extract_root):
184
  shutil.rmtree(extract_root)
185
  return None, None, None, f"Error: {str(e)}", "", None
186
 
187
- def estimate_time(num_cards):
188
- """Rough estimate: 2s per card"""
189
- seconds = num_cards * 2.0
190
- if seconds < 60: return f"~{int(seconds)}s"
191
- return f"~{int(seconds/60)} min"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
194
  if df_full is None or len(df_full) == 0:
@@ -240,8 +289,8 @@ def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Prog
240
  fields=[{'name': 'Question'}, {'name': 'Answer'}],
241
  templates=[{
242
  'name': 'Card 1',
243
- 'qfmt': '{{Question}}<br>{{AudioQ}}',
244
- 'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}<br>{{AudioA}}',
245
  }])
246
  my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
247
 
@@ -347,8 +396,20 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
347
  eta_box = gr.Textbox(label="Est. Time", interactive=False)
348
 
349
  with gr.Row():
350
- search_box = gr.Textbox(label="Filter (Optional)", placeholder="Process subset...")
351
-
 
 
 
 
 
 
 
 
 
 
 
 
352
  # New 3-Way Toggle
353
  mode_radio = gr.Radio(
354
  choices=[
@@ -360,7 +421,11 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
360
  label="Generation Mode"
361
  )
362
 
363
- preview_table = gr.Dataframe(label="Preview (First 100)", interactive=False)
 
 
 
 
364
 
365
  with gr.Row():
366
  btn = gr.Button("🚀 Generate Deck", variant="primary")
@@ -368,21 +433,73 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
368
 
369
  result_lbl = gr.Textbox(label="Result", interactive=False)
370
 
 
 
371
  def on_upload(file):
372
  # Returns: df, has_media, preview, msg, eta, extract_path
373
- df, _, preview, msg, eta, ext_path = parse_file(file)
374
- return df, preview, msg, eta, ext_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
  file_input.upload(on_upload, inputs=file_input,
377
- outputs=[full_df_state, preview_table, status, eta_box, extract_root_state])
378
 
379
- def on_search(term, df):
380
- if df is None: return None
381
- if not term: return df.head(PREVIEW_LIMIT)
382
- mask = df.astype(str).apply(lambda x: x.str.contains(term, case=False)).any(axis=1)
383
- return df[mask].head(PREVIEW_LIMIT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- search_box.change(on_search, inputs=[search_box, full_df_state], outputs=preview_table)
 
 
 
 
 
 
 
 
 
 
386
 
387
  btn.click(process_dataframe,
388
  inputs=[full_df_state, search_box, extract_root_state, mode_radio],
 
118
 
119
  return q_out, a_out
120
 
121
+ def strip_html_for_display(text):
122
+ """Remove HTML tags for preview readability."""
123
+ if pd.isna(text) or text == "": return ""
124
+ text = str(text)
125
+ # Remove HTML tags
126
+ text = re.sub(r'<[^>]+>', '', text)
127
+ # Decode HTML entities
128
+ text = text.replace('&nbsp;', ' ').replace('&gt;', '>').replace('&lt;', '<').replace('&amp;', '&')
129
+ # Limit length for display
130
+ if len(text) > 200:
131
+ text = text[:200] + '...'
132
+ return text.strip()
133
+
134
+ def extract_unique_tags(df):
135
+ """Extract all unique tags from the Tags column."""
136
+ if df is None or 'Tags' not in df.columns:
137
+ return ["All"]
138
+
139
+ all_tags = set()
140
+ for tag_str in df['Tags']:
141
+ if tag_str:
142
+ # Tags are space-separated, e.g., " MK_MathematicsKnowledge "
143
+ tags = [t.strip() for t in tag_str.split() if t.strip()]
144
+ all_tags.update(tags)
145
+
146
+ return ["All"] + sorted(list(all_tags))
147
+
148
  def parse_file(file_obj):
149
  if file_obj is None:
150
  return None, None, None, "No file uploaded", "", None
 
164
 
165
  df = df.iloc[:, :2]
166
  df.columns = ["Question", "Answer"]
167
+ df['Tags'] = "" # CSV files don't have tags
168
 
169
  elif ext == ".apkg" or ext == ".zip":
170
  # Extract to a PERSISTENT temp dir (passed to state)
 
183
 
184
  conn = sqlite3.connect(col_path)
185
  cur = conn.cursor()
186
+ cur.execute("SELECT flds, tags FROM notes")
187
  rows = cur.fetchall()
188
 
189
  data = []
190
  for r in rows:
191
  flds = r[0].split('\x1f')
192
+ tags = r[1].strip() if len(r) > 1 else ""
193
  q = flds[0] if len(flds) > 0 else ""
194
  a = flds[1] if len(flds) > 1 else ""
195
+ data.append([q, a, tags])
196
 
197
+ df = pd.DataFrame(data, columns=["Question", "Answer", "Tags"])
198
  conn.close()
199
 
200
  else:
 
206
  if has_media:
207
  msg += " 🎵 Existing media detected."
208
 
209
+ return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df), has_media), extract_root
210
 
211
  except Exception as e:
212
  if extract_root and os.path.exists(extract_root):
213
  shutil.rmtree(extract_root)
214
  return None, None, None, f"Error: {str(e)}", "", None
215
 
216
+ def estimate_time(num_cards, has_existing_media=False, mode="Smart Fill (Preserve Existing)"):
217
+ """
218
+ Estimate based on benchmark: ~4.7s per card for full generation.
219
+ Adjusts for Smart Fill mode when existing media is present.
220
+ """
221
+ if num_cards == 0:
222
+ return "0s"
223
+
224
+ # Base benchmark: 4.7s per card for full audio generation
225
+ seconds_per_card = 4.7
226
+
227
+ # If using Smart Fill with existing media, assume ~50% speedup (many cards already have audio)
228
+ if has_existing_media and "Smart Fill" in mode:
229
+ seconds_per_card *= 0.5
230
+
231
+ seconds = num_cards * seconds_per_card
232
+
233
+ if seconds < 60:
234
+ return f"~{int(seconds)}s"
235
+ elif seconds < 3600:
236
+ return f"~{int(seconds/60)} min"
237
+ else:
238
+ hours = int(seconds / 3600)
239
+ mins = int((seconds % 3600) / 60)
240
+ return f"~{hours}h {mins}m" if mins > 0 else f"~{hours}h"
241
 
242
  def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
243
  if df_full is None or len(df_full) == 0:
 
289
  fields=[{'name': 'Question'}, {'name': 'Answer'}],
290
  templates=[{
291
  'name': 'Card 1',
292
+ 'qfmt': '{{Question}}',
293
+ 'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
294
  }])
295
  my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
296
 
 
396
  eta_box = gr.Textbox(label="Est. Time", interactive=False)
397
 
398
  with gr.Row():
399
+ search_box = gr.Textbox(label="Search Text", placeholder="Enter text to search...")
400
+ search_field = gr.Radio(
401
+ choices=["Both", "Question Only", "Answer Only"],
402
+ value="Both",
403
+ label="Search In"
404
+ )
405
+ tag_dropdown = gr.Dropdown(
406
+ label="Filter by Tag",
407
+ choices=["All"],
408
+ value="All",
409
+ interactive=True
410
+ )
411
+
412
+ with gr.Row():
413
  # New 3-Way Toggle
414
  mode_radio = gr.Radio(
415
  choices=[
 
421
  label="Generation Mode"
422
  )
423
 
424
+ preview_table = gr.Dataframe(
425
+ label="Preview (First 100)",
426
+ interactive=False,
427
+ column_widths=["30%", "45%", "25%"]
428
+ )
429
 
430
  with gr.Row():
431
  btn = gr.Button("🚀 Generate Deck", variant="primary")
 
433
 
434
  result_lbl = gr.Textbox(label="Result", interactive=False)
435
 
436
+ has_media_state = gr.State(False)
437
+
438
  def on_upload(file):
439
  # Returns: df, has_media, preview, msg, eta, extract_path
440
+ df, has_media, preview, msg, eta, ext_path = parse_file(file)
441
+
442
+ # Extract tags and create cleaned preview
443
+ tag_choices = extract_unique_tags(df)
444
+
445
+ if df is not None:
446
+ display_df = df.copy()
447
+ display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
448
+ display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
449
+ clean_preview = display_df.head(PREVIEW_LIMIT)
450
+ else:
451
+ clean_preview = preview
452
+
453
+ return (
454
+ df, # full_df_state
455
+ has_media, # has_media_state
456
+ clean_preview, # preview_table
457
+ msg, # status
458
+ eta, # eta_box
459
+ ext_path, # extract_root_state
460
+ gr.Dropdown(choices=tag_choices, value="All") # tag_dropdown
461
+ )
462
 
463
  file_input.upload(on_upload, inputs=file_input,
464
+ outputs=[full_df_state, has_media_state, preview_table, status, eta_box, extract_root_state, tag_dropdown])
465
 
466
+ def on_search(term, df, has_media, mode, search_in, selected_tag):
467
+ if df is None: return None, "No data"
468
+
469
+ filtered_df = df.copy()
470
+
471
+ # Apply tag filter first
472
+ if selected_tag and selected_tag != "All":
473
+ filtered_df = filtered_df[filtered_df['Tags'].str.contains(selected_tag, na=False, case=False)]
474
+
475
+ # Apply text search
476
+ if term:
477
+ if search_in == "Question Only":
478
+ mask = filtered_df['Question'].str.contains(term, case=False, na=False)
479
+ elif search_in == "Answer Only":
480
+ mask = filtered_df['Answer'].str.contains(term, case=False, na=False)
481
+ else: # Both
482
+ mask = filtered_df.astype(str).apply(lambda x: x.str.contains(term, case=False, na=False)).any(axis=1)
483
+ filtered_df = filtered_df[mask]
484
+
485
+ # Create cleaned display version
486
+ display_df = filtered_df.copy()
487
+ display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
488
+ display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
489
+
490
+ return display_df.head(PREVIEW_LIMIT), estimate_time(len(filtered_df), has_media, mode)
491
 
492
+ search_box.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
493
+ outputs=[preview_table, eta_box])
494
+
495
+ search_field.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
496
+ outputs=[preview_table, eta_box])
497
+
498
+ tag_dropdown.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
499
+ outputs=[preview_table, eta_box])
500
+
501
+ mode_radio.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
502
+ outputs=[preview_table, eta_box])
503
 
504
  btn.click(process_dataframe,
505
  inputs=[full_df_state, search_box, extract_root_state, mode_radio],