gmedin commited on
Commit
fa4b92a
·
verified ·
1 Parent(s): 50d25e2

Error related to docx filenames

Browse files
Files changed (1) hide show
  1. app.py +45 -1
app.py CHANGED
@@ -365,6 +365,46 @@ def strip_html_tags(text):
365
  text = re.sub(r'</span>', '', text)
366
  return text
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  def create_docx_file(lessons_data, lesson_topic, lesson_length, model_name):
369
  """
370
  Creates a DOCX file from a sequence of lessons.
@@ -646,10 +686,14 @@ else:
646
  st.session_state.lesson_length,
647
  model_name
648
  )
 
 
 
 
649
  st.download_button(
650
  label=f"Download {model_name} (DOCX)",
651
  data=docx_file,
652
- file_name=f"{model_name.replace(' ', '_')}_lesson_sequence_{st.session_state.lesson_topic.replace(' ', '_')}.docx",
653
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
654
  key=f"download_docx_{model_name}"
655
  )
 
365
  text = re.sub(r'</span>', '', text)
366
  return text
367
 
368
+ def sanitize_filename(text, max_length=100):
369
+ """
370
+ Sanitizes text to create a safe filename for both filesystems and HTTP headers.
371
+
372
+ Args:
373
+ text: The text to sanitize
374
+ max_length: Maximum length for the sanitized text (default 100)
375
+
376
+ Returns:
377
+ A sanitized string safe for use in filenames and HTTP Content-Disposition headers
378
+ """
379
+ if not text:
380
+ return "lesson"
381
+
382
+ # Replace newlines, tabs, and other whitespace with single space
383
+ text = re.sub(r'[\n\r\t\v\f]+', ' ', text)
384
+
385
+ # Replace multiple spaces with single space
386
+ text = re.sub(r'\s+', ' ', text)
387
+
388
+ # Remove or replace unsafe characters for filenames and HTTP headers
389
+ # Keep only alphanumeric, spaces, hyphens, and underscores
390
+ text = re.sub(r'[^\w\s\-]', '', text)
391
+
392
+ # Replace spaces with underscores
393
+ text = text.replace(' ', '_')
394
+
395
+ # Remove leading/trailing underscores
396
+ text = text.strip('_')
397
+
398
+ # Truncate to max_length while avoiding cutting mid-word
399
+ if len(text) > max_length:
400
+ text = text[:max_length].rsplit('_', 1)[0]
401
+
402
+ # Ensure we have at least some text
403
+ if not text:
404
+ return "lesson"
405
+
406
+ return text
407
+
408
  def create_docx_file(lessons_data, lesson_topic, lesson_length, model_name):
409
  """
410
  Creates a DOCX file from a sequence of lessons.
 
686
  st.session_state.lesson_length,
687
  model_name
688
  )
689
+ # Sanitize filename components
690
+ safe_model_name = sanitize_filename(model_name, max_length=50)
691
+ safe_topic = sanitize_filename(st.session_state.lesson_topic, max_length=80)
692
+
693
  st.download_button(
694
  label=f"Download {model_name} (DOCX)",
695
  data=docx_file,
696
+ file_name=f"{safe_model_name}_lesson_sequence_{safe_topic}.docx",
697
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
698
  key=f"download_docx_{model_name}"
699
  )