Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
app.py
CHANGED
|
@@ -118,6 +118,33 @@ def generate_audio_for_row(q_text, a_text, idx, tmpdir, mode):
|
|
| 118 |
|
| 119 |
return q_out, a_out
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
def parse_file(file_obj):
|
| 122 |
if file_obj is None:
|
| 123 |
return None, None, None, "No file uploaded", "", None
|
|
@@ -137,6 +164,7 @@ def parse_file(file_obj):
|
|
| 137 |
|
| 138 |
df = df.iloc[:, :2]
|
| 139 |
df.columns = ["Question", "Answer"]
|
|
|
|
| 140 |
|
| 141 |
elif ext == ".apkg" or ext == ".zip":
|
| 142 |
# Extract to a PERSISTENT temp dir (passed to state)
|
|
@@ -155,17 +183,18 @@ def parse_file(file_obj):
|
|
| 155 |
|
| 156 |
conn = sqlite3.connect(col_path)
|
| 157 |
cur = conn.cursor()
|
| 158 |
-
cur.execute("SELECT flds FROM notes")
|
| 159 |
rows = cur.fetchall()
|
| 160 |
|
| 161 |
data = []
|
| 162 |
for r in rows:
|
| 163 |
flds = r[0].split('\x1f')
|
|
|
|
| 164 |
q = flds[0] if len(flds) > 0 else ""
|
| 165 |
a = flds[1] if len(flds) > 1 else ""
|
| 166 |
-
data.append([q, a])
|
| 167 |
|
| 168 |
-
df = pd.DataFrame(data, columns=["Question", "Answer"])
|
| 169 |
conn.close()
|
| 170 |
|
| 171 |
else:
|
|
@@ -177,18 +206,38 @@ def parse_file(file_obj):
|
|
| 177 |
if has_media:
|
| 178 |
msg += " 🎵 Existing media detected."
|
| 179 |
|
| 180 |
-
return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df)), extract_root
|
| 181 |
|
| 182 |
except Exception as e:
|
| 183 |
if extract_root and os.path.exists(extract_root):
|
| 184 |
shutil.rmtree(extract_root)
|
| 185 |
return None, None, None, f"Error: {str(e)}", "", None
|
| 186 |
|
| 187 |
-
def estimate_time(num_cards):
|
| 188 |
-
"""
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
|
| 194 |
if df_full is None or len(df_full) == 0:
|
|
@@ -240,8 +289,8 @@ def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Prog
|
|
| 240 |
fields=[{'name': 'Question'}, {'name': 'Answer'}],
|
| 241 |
templates=[{
|
| 242 |
'name': 'Card 1',
|
| 243 |
-
'qfmt': '{{Question}}
|
| 244 |
-
'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}
|
| 245 |
}])
|
| 246 |
my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
|
| 247 |
|
|
@@ -347,8 +396,20 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
|
|
| 347 |
eta_box = gr.Textbox(label="Est. Time", interactive=False)
|
| 348 |
|
| 349 |
with gr.Row():
|
| 350 |
-
search_box = gr.Textbox(label="
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
# New 3-Way Toggle
|
| 353 |
mode_radio = gr.Radio(
|
| 354 |
choices=[
|
|
@@ -360,7 +421,11 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
|
|
| 360 |
label="Generation Mode"
|
| 361 |
)
|
| 362 |
|
| 363 |
-
preview_table = gr.Dataframe(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
with gr.Row():
|
| 366 |
btn = gr.Button("🚀 Generate Deck", variant="primary")
|
|
@@ -368,21 +433,73 @@ with gr.Blocks(title="Pocket TTS Anki") as app:
|
|
| 368 |
|
| 369 |
result_lbl = gr.Textbox(label="Result", interactive=False)
|
| 370 |
|
|
|
|
|
|
|
| 371 |
def on_upload(file):
|
| 372 |
# Returns: df, has_media, preview, msg, eta, extract_path
|
| 373 |
-
df,
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
file_input.upload(on_upload, inputs=file_input,
|
| 377 |
-
outputs=[full_df_state, preview_table, status, eta_box, extract_root_state])
|
| 378 |
|
| 379 |
-
def on_search(term, df):
|
| 380 |
-
if df is None: return None
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
-
search_box.change(on_search, inputs=[search_box, full_df_state],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
btn.click(process_dataframe,
|
| 388 |
inputs=[full_df_state, search_box, extract_root_state, mode_radio],
|
|
|
|
| 118 |
|
| 119 |
return q_out, a_out
|
| 120 |
|
| 121 |
+
def strip_html_for_display(text):
|
| 122 |
+
"""Remove HTML tags for preview readability."""
|
| 123 |
+
if pd.isna(text) or text == "": return ""
|
| 124 |
+
text = str(text)
|
| 125 |
+
# Remove HTML tags
|
| 126 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 127 |
+
# Decode HTML entities
|
| 128 |
+
text = text.replace(' ', ' ').replace('>', '>').replace('<', '<').replace('&', '&')
|
| 129 |
+
# Limit length for display
|
| 130 |
+
if len(text) > 200:
|
| 131 |
+
text = text[:200] + '...'
|
| 132 |
+
return text.strip()
|
| 133 |
+
|
| 134 |
+
def extract_unique_tags(df):
|
| 135 |
+
"""Extract all unique tags from the Tags column."""
|
| 136 |
+
if df is None or 'Tags' not in df.columns:
|
| 137 |
+
return ["All"]
|
| 138 |
+
|
| 139 |
+
all_tags = set()
|
| 140 |
+
for tag_str in df['Tags']:
|
| 141 |
+
if tag_str:
|
| 142 |
+
# Tags are space-separated, e.g., " MK_MathematicsKnowledge "
|
| 143 |
+
tags = [t.strip() for t in tag_str.split() if t.strip()]
|
| 144 |
+
all_tags.update(tags)
|
| 145 |
+
|
| 146 |
+
return ["All"] + sorted(list(all_tags))
|
| 147 |
+
|
| 148 |
def parse_file(file_obj):
|
| 149 |
if file_obj is None:
|
| 150 |
return None, None, None, "No file uploaded", "", None
|
|
|
|
| 164 |
|
| 165 |
df = df.iloc[:, :2]
|
| 166 |
df.columns = ["Question", "Answer"]
|
| 167 |
+
df['Tags'] = "" # CSV files don't have tags
|
| 168 |
|
| 169 |
elif ext == ".apkg" or ext == ".zip":
|
| 170 |
# Extract to a PERSISTENT temp dir (passed to state)
|
|
|
|
| 183 |
|
| 184 |
conn = sqlite3.connect(col_path)
|
| 185 |
cur = conn.cursor()
|
| 186 |
+
cur.execute("SELECT flds, tags FROM notes")
|
| 187 |
rows = cur.fetchall()
|
| 188 |
|
| 189 |
data = []
|
| 190 |
for r in rows:
|
| 191 |
flds = r[0].split('\x1f')
|
| 192 |
+
tags = r[1].strip() if len(r) > 1 else ""
|
| 193 |
q = flds[0] if len(flds) > 0 else ""
|
| 194 |
a = flds[1] if len(flds) > 1 else ""
|
| 195 |
+
data.append([q, a, tags])
|
| 196 |
|
| 197 |
+
df = pd.DataFrame(data, columns=["Question", "Answer", "Tags"])
|
| 198 |
conn.close()
|
| 199 |
|
| 200 |
else:
|
|
|
|
| 206 |
if has_media:
|
| 207 |
msg += " 🎵 Existing media detected."
|
| 208 |
|
| 209 |
+
return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df), has_media), extract_root
|
| 210 |
|
| 211 |
except Exception as e:
|
| 212 |
if extract_root and os.path.exists(extract_root):
|
| 213 |
shutil.rmtree(extract_root)
|
| 214 |
return None, None, None, f"Error: {str(e)}", "", None
|
| 215 |
|
| 216 |
+
def estimate_time(num_cards, has_existing_media=False, mode="Smart Fill (Preserve Existing)"):
|
| 217 |
+
"""
|
| 218 |
+
Estimate based on benchmark: ~4.7s per card for full generation.
|
| 219 |
+
Adjusts for Smart Fill mode when existing media is present.
|
| 220 |
+
"""
|
| 221 |
+
if num_cards == 0:
|
| 222 |
+
return "0s"
|
| 223 |
+
|
| 224 |
+
# Base benchmark: 4.7s per card for full audio generation
|
| 225 |
+
seconds_per_card = 4.7
|
| 226 |
+
|
| 227 |
+
# If using Smart Fill with existing media, assume ~50% speedup (many cards already have audio)
|
| 228 |
+
if has_existing_media and "Smart Fill" in mode:
|
| 229 |
+
seconds_per_card *= 0.5
|
| 230 |
+
|
| 231 |
+
seconds = num_cards * seconds_per_card
|
| 232 |
+
|
| 233 |
+
if seconds < 60:
|
| 234 |
+
return f"~{int(seconds)}s"
|
| 235 |
+
elif seconds < 3600:
|
| 236 |
+
return f"~{int(seconds/60)} min"
|
| 237 |
+
else:
|
| 238 |
+
hours = int(seconds / 3600)
|
| 239 |
+
mins = int((seconds % 3600) / 60)
|
| 240 |
+
return f"~{hours}h {mins}m" if mins > 0 else f"~{hours}h"
|
| 241 |
|
| 242 |
def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
|
| 243 |
if df_full is None or len(df_full) == 0:
|
|
|
|
| 289 |
fields=[{'name': 'Question'}, {'name': 'Answer'}],
|
| 290 |
templates=[{
|
| 291 |
'name': 'Card 1',
|
| 292 |
+
'qfmt': '{{Question}}',
|
| 293 |
+
'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
|
| 294 |
}])
|
| 295 |
my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
|
| 296 |
|
|
|
|
| 396 |
eta_box = gr.Textbox(label="Est. Time", interactive=False)
|
| 397 |
|
| 398 |
with gr.Row():
|
| 399 |
+
search_box = gr.Textbox(label="Search Text", placeholder="Enter text to search...")
|
| 400 |
+
search_field = gr.Radio(
|
| 401 |
+
choices=["Both", "Question Only", "Answer Only"],
|
| 402 |
+
value="Both",
|
| 403 |
+
label="Search In"
|
| 404 |
+
)
|
| 405 |
+
tag_dropdown = gr.Dropdown(
|
| 406 |
+
label="Filter by Tag",
|
| 407 |
+
choices=["All"],
|
| 408 |
+
value="All",
|
| 409 |
+
interactive=True
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
with gr.Row():
|
| 413 |
# New 3-Way Toggle
|
| 414 |
mode_radio = gr.Radio(
|
| 415 |
choices=[
|
|
|
|
| 421 |
label="Generation Mode"
|
| 422 |
)
|
| 423 |
|
| 424 |
+
preview_table = gr.Dataframe(
|
| 425 |
+
label="Preview (First 100)",
|
| 426 |
+
interactive=False,
|
| 427 |
+
column_widths=["30%", "45%", "25%"]
|
| 428 |
+
)
|
| 429 |
|
| 430 |
with gr.Row():
|
| 431 |
btn = gr.Button("🚀 Generate Deck", variant="primary")
|
|
|
|
| 433 |
|
| 434 |
result_lbl = gr.Textbox(label="Result", interactive=False)
|
| 435 |
|
| 436 |
+
has_media_state = gr.State(False)
|
| 437 |
+
|
| 438 |
def on_upload(file):
|
| 439 |
# Returns: df, has_media, preview, msg, eta, extract_path
|
| 440 |
+
df, has_media, preview, msg, eta, ext_path = parse_file(file)
|
| 441 |
+
|
| 442 |
+
# Extract tags and create cleaned preview
|
| 443 |
+
tag_choices = extract_unique_tags(df)
|
| 444 |
+
|
| 445 |
+
if df is not None:
|
| 446 |
+
display_df = df.copy()
|
| 447 |
+
display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
|
| 448 |
+
display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
|
| 449 |
+
clean_preview = display_df.head(PREVIEW_LIMIT)
|
| 450 |
+
else:
|
| 451 |
+
clean_preview = preview
|
| 452 |
+
|
| 453 |
+
return (
|
| 454 |
+
df, # full_df_state
|
| 455 |
+
has_media, # has_media_state
|
| 456 |
+
clean_preview, # preview_table
|
| 457 |
+
msg, # status
|
| 458 |
+
eta, # eta_box
|
| 459 |
+
ext_path, # extract_root_state
|
| 460 |
+
gr.Dropdown(choices=tag_choices, value="All") # tag_dropdown
|
| 461 |
+
)
|
| 462 |
|
| 463 |
file_input.upload(on_upload, inputs=file_input,
|
| 464 |
+
outputs=[full_df_state, has_media_state, preview_table, status, eta_box, extract_root_state, tag_dropdown])
|
| 465 |
|
| 466 |
+
def on_search(term, df, has_media, mode, search_in, selected_tag):
|
| 467 |
+
if df is None: return None, "No data"
|
| 468 |
+
|
| 469 |
+
filtered_df = df.copy()
|
| 470 |
+
|
| 471 |
+
# Apply tag filter first
|
| 472 |
+
if selected_tag and selected_tag != "All":
|
| 473 |
+
filtered_df = filtered_df[filtered_df['Tags'].str.contains(selected_tag, na=False, case=False)]
|
| 474 |
+
|
| 475 |
+
# Apply text search
|
| 476 |
+
if term:
|
| 477 |
+
if search_in == "Question Only":
|
| 478 |
+
mask = filtered_df['Question'].str.contains(term, case=False, na=False)
|
| 479 |
+
elif search_in == "Answer Only":
|
| 480 |
+
mask = filtered_df['Answer'].str.contains(term, case=False, na=False)
|
| 481 |
+
else: # Both
|
| 482 |
+
mask = filtered_df.astype(str).apply(lambda x: x.str.contains(term, case=False, na=False)).any(axis=1)
|
| 483 |
+
filtered_df = filtered_df[mask]
|
| 484 |
+
|
| 485 |
+
# Create cleaned display version
|
| 486 |
+
display_df = filtered_df.copy()
|
| 487 |
+
display_df['Question'] = display_df['Question'].apply(strip_html_for_display)
|
| 488 |
+
display_df['Answer'] = display_df['Answer'].apply(strip_html_for_display)
|
| 489 |
+
|
| 490 |
+
return display_df.head(PREVIEW_LIMIT), estimate_time(len(filtered_df), has_media, mode)
|
| 491 |
|
| 492 |
+
search_box.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
|
| 493 |
+
outputs=[preview_table, eta_box])
|
| 494 |
+
|
| 495 |
+
search_field.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
|
| 496 |
+
outputs=[preview_table, eta_box])
|
| 497 |
+
|
| 498 |
+
tag_dropdown.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
|
| 499 |
+
outputs=[preview_table, eta_box])
|
| 500 |
+
|
| 501 |
+
mode_radio.change(on_search, inputs=[search_box, full_df_state, has_media_state, mode_radio, search_field, tag_dropdown],
|
| 502 |
+
outputs=[preview_table, eta_box])
|
| 503 |
|
| 504 |
btn.click(process_dataframe,
|
| 505 |
inputs=[full_df_state, search_box, extract_root_state, mode_radio],
|